library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.4
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.4.4
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
library(ggplot2)
library(highcharter)
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(readr)

historical_web_data = readRDS("data\\historical_web_data_26112015.rds")
disaster = read_delim("data\\disaster.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   FLAG_TSUNAMI = col_character(),
##   SECOND = col_character(),
##   EQ_PRIMARY = col_double(),
##   EQ_MAG_MW = col_double(),
##   EQ_MAG_MS = col_double(),
##   EQ_MAG_MB = col_character(),
##   EQ_MAG_ML = col_double(),
##   EQ_MAG_MFA = col_character(),
##   EQ_MAG_UNK = col_double(),
##   COUNTRY = col_character(),
##   STATE = col_character(),
##   LOCATION_NAME = col_character(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   MISSING = col_character(),
##   DAMAGE_MILLIONS_DOLLARS = col_character(),
##   TOTAL_MISSING = col_character(),
##   TOTAL_MISSING_DESCRIPTION = col_character(),
##   TOTAL_DAMAGE_MILLIONS_DOLLARS = col_character()
## )
## See spec(...) for full column specifications.
iran_earthquake = readRDS("data\\iran_earthquake.rds")
iran_map = read_rds("data\\Tehrn_map_6.rds")
worldwide = read.csv("data\\worldwide.csv")

plot1 <- plot_ly(historical_web_data, x = ~Latitude, y = ~Longitude, z = ~Depth, color = ~Magnitude, sizes = c(5, 550), size = ~Magnitude)
plot1
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: package 'bindrcpp' was built under R version 3.4.4


disaster %>% filter(FLAG_TSUNAMI == "Tsu") %>% 
rename(lat = LATITUDE,lon = LONGITUDE, z = EQ_PRIMARY,name = COUNTRY,sequence = YEAR)  %>% 
  select(lat, lon, name,  z) -> dis 
hcmap() %>% 
  hc_add_series(data = dis, type = "mapbubble",
                minSize = 0, maxSize = 30)  %>% 
  hc_plotOptions(series = list(showInLegend = FALSE))


iran_big_earthquakes <- iran_earthquake %>% filter(Mag> 5)
#View(iran_big_earthquakes)
ggmap(iran_map) +  geom_point(aes(x = Long, y = Lat, size = Mag), data = iran_big_earthquakes,  alpha = .5, color="darkred") + geom_density_2d()
## Warning: Removed 1 rows containing missing values (geom_point).


firstup <- function(x) {
   substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}

disaster %>% filter(is.na(FLAG_TSUNAMI)) %>% rowwise() %>% mutate(cCOUNTRY = firstup(tolower(COUNTRY))) %>% group_by(cCOUNTRY) %>% summarise(LATITUDE = mean(LATITUDE, na.rm = T), LONGITUDE = mean(LONGITUDE, na.rm = T), numbers = n(), death_sum = sum(DEATHS, na.rm =  T)) %>% rowwise() %>% mutate(meann = death_sum/numbers) -> data5
## Warning: Grouping rowwise data frame strips rowwise nature
View(data5)
hcmap(data = data5, value = "death_sum", joinBy = c("name", "cCOUNTRY")) 


disaster %>% filter(is.na(FLAG_TSUNAMI)) %>% select(id = I_D, lat = LATITUDE, long = LONGITUDE, death = DEATHS, foc = FOCAL_DEPTH, intensity = INTENSITY) -> data6
fit = lm(death ~lat + long + foc + intensity, data = data6)
print(summary(fit))
## 
## Call:
## lm(formula = death ~ lat + long + foc + intensity, data = data6)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -11822  -5647  -3353    500 230352 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -12551.48    4616.81  -2.719 0.006867 ** 
## lat             22.67      46.61   0.486 0.626998    
## long            23.33      12.78   1.825 0.068784 .  
## foc            -26.86      30.18  -0.890 0.374166    
## intensity     1994.26     549.01   3.632 0.000321 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17890 on 366 degrees of freedom
##   (3861 observations deleted due to missingness)
## Multiple R-squared:  0.05365,    Adjusted R-squared:  0.0433 
## F-statistic: 5.187 on 4 and 366 DF,  p-value: 0.0004487

We can see the coefficients in the summary of the fit model. ***


View(worldwide)
depth_worldwide = worldwide$depth
mag_worldwide= worldwide$mag
cor.test(depth_worldwide, mag_worldwide)
## 
##  Pearson's product-moment correlation
## 
## data:  depth_worldwide and mag_worldwide
## t = 50.03, df = 60629, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1914584 0.2067469
## sample estimates:
##       cor 
## 0.1991147
As we can see the P_value for H0 (correlation between depth and mag is 0) is very low. So we can reject the null hypothesis and conclude that there is a correlation between depth and magnitude.


View(disaster)
library(stringr)
## Warning: package 'stringr' was built under R version 3.4.4
View(worldwide)
worldwide %>% rowwise() %>% mutate(year = unlist(str_split(time, "-"))[1], country = unlist(str_split(place, ","))[2]) %>% filter(type == "earthquake") %>% group_by(country, year) %>% summarise(numbers = n()) %>% group_by(country) %>% summarise(summs = sum(numbers), nn = n(), mean_by_year = sum(numbers)/n()) %>% arrange(-mean_by_year) %>% slice(1:60) -> data9
## Warning: Grouping rowwise data frame strips rowwise nature
data9 %>%  ggplot() + geom_bar(aes(x = reorder(country, mean_by_year), y = mean_by_year), stat = "identity") + theme(axis.text.x = element_text(angle = 90))
These are only 60 countries with most earthquakes. We can see that even some U.S. states have more earthquakes than iran. Also countries like Japan and New Zealand which are allies of the U.S. have high earthquake rates. And not all of the enemies of U.S. are in the chart. So we can reject the Harp theory.


  1. We check if there is a relation between mag and gap of an earthquake.

gap_worldwide = worldwide$gap
mag_worldwide= worldwide$mag
cor.test(gap_worldwide, mag_worldwide)
## 
##  Pearson's product-moment correlation
## 
## data:  gap_worldwide and mag_worldwide
## t = -94.188, df = 54620, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3809926 -0.3665636
## sample estimates:
##        cor 
## -0.3738007

We can see there is a correlation between gap and magnitude.

Now we will check if the mean number of deaths in a earthquake and a tsunami is the same:

Tsunami <- disaster %>% filter(FLAG_TSUNAMI == "Tsu") %>% select(DEATHS) %>%  as.vector() %>% filter(is.na(DEATHS) == F)
earthQuake <-Tsunami <- disaster %>% filter(is.na(FLAG_TSUNAMI) == T) %>% select(DEATHS) %>%  as.vector() %>% filter(is.na(DEATHS) == F) 
p = data.frame(Tsunami = Tsunami$DEATHS, earthQuake = earthQuake$DEATHS)
aov (Tsunami ~ earthQuake, data = p) -> q
summary(q)
##               Df    Sum Sq   Mean Sq   F value Pr(>F)    
## earthQuake     1 1.061e+12 1.061e+12 6.851e+32 <2e-16 ***
## Residuals   1588 0.000e+00 0.000e+00                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

According to the P-value, we can say the means of deaths of earthquakes and Tsunamies are different.

And now we will see if there is a relation between magnitude and its error:

err_worldwide = worldwide$magError
mag_worldwide= worldwide$mag
cor.test(err_worldwide, mag_worldwide)
## 
##  Pearson's product-moment correlation
## 
## data:  err_worldwide and mag_worldwide
## t = -25.645, df = 48514, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1244214 -0.1068628
## sample estimates:
##        cor 
## -0.1156511

And as we can see, there is a relation between these two parameters.